
getwd()
setwd("/Users/Desktop")
setwd("/Users/vicissitude_86/Documents//Abortion/Dataset/spss/spss") #### Mac Pro ###
getwd()

setwd("/Users/Aiden/Documents/cambridge_copy/Replicationcourse/Abortion/Dataset/spss/spss") #### IMAC ####
getwd()

setwd("c:/Users/Aiden/Downloads/Abortion/Abortion/Dataset/spss/spss") #### Desktop ###

library(foreign) # in order to read spss files. 
natsal1 <- read.spss("natsal.por", use.value.labels=TRUE, to.data.frame=TRUE) #.por = SPSS portable File 

############################################## Table 2 ###################################################################
#################################### CHI-SQUARE TEST (START)  ############################################################

#### Miscarriage####
natsal1[["MISCARR"]]      ##list all the responses
natsal1$MISCARR
table(natsal1$ABORT, natsal1$MISCARR)

View(natsal1$ABORT)
dataset1 <- natsal1[,c("ABORT","MISCARR")] ### if it is [,c(ABORT,MISCARR)], it means that all the rows are selected and for the columns, only the variables of abortion and miscarriage are selected.  
which(dataset1$ABORT == "yes" | dataset1$ABORT=="no")  ### which(,) - which observation fulfill certain condition ###
which(dataset1$MISCARR == "yes" | dataset1$MISCARR=="no") 
dataset3 <- dataset1[which(dataset1$ABORT == "yes" | dataset1$ABORT=="no") ,] # this selects from dataset1, abort variable, response option yes or no (any other responses are ignored)
dataset4 <- dataset3[which(dataset3$MISCARR == "yes" | dataset3$MISCARR=="no") ,] #accounts for both abort and miscarriage variable, because R is looking into dataset 3
table(dataset4)

dataset4$ABORT <- droplevels (dataset4$ABORT)  ### this will delete the levels that are not of interest anymore since the data is being cleaned out by the above methods. 
dataset4$MISCARR <- droplevels (dataset4$MISCARR)
table(dataset4)

chisq.test (table(dataset4))

chisq.test(table(natsal1$ABORT, natsal1$MISCARR)) ### chisq is not recommended because there were cells that zero values, that means you have to use fisher, because it does not fulfil certain assumptions for chi-sqqure. 
fisher.test(table(natsal1$ABORT, natsal1$MISCARR))

######## Age at interview (Years) ######################

natsal2[[9]]
table(natsal1$ABORT, natsal1$DAGE)


natsal2$DAGE1 <- natsal2$DAGE
natsal2$DAGE1 <- as.character(natsal2$DAGE1)
natsal2$DAGE1[natsal2$DAGE1 == -1] <-NA
natsal2$DAGE1[natsal2$DAGE1 == 99] <-NA

natsal2$DAGE1[natsal2$DAGE1== "16" | natsal2$DAGE1=="17"| natsal2$DAGE1== "18" | natsal2$DAGE1=="19"] <- "16-19"
natsal2$DAGE1[natsal2$DAGE1== "20" | natsal2$DAGE1=="21"| natsal2$DAGE1== "22" | natsal2$DAGE1=="23" | natsal2$DAGE1== "24"] <- "20-24"
natsal2$DAGE1[natsal2$DAGE1== "25" | natsal2$DAGE1=="26"| natsal2$DAGE1== "27" | natsal2$DAGE1=="28" | natsal2$DAGE1== "29"] <- "25-29"
natsal2$DAGE1[natsal2$DAGE1== "30" | natsal2$DAGE1=="31"| natsal2$DAGE1== "32" | natsal2$DAGE1=="33" | natsal2$DAGE1== "34"] <- "30-24"
natsal2$DAGE1[natsal2$DAGE1== "35" | natsal2$DAGE1=="36"| natsal2$DAGE1== "37" | natsal2$DAGE1=="38" | natsal2$DAGE1== "39"] <- "35-39"
natsal2$DAGE1[natsal2$DAGE1== "40" | natsal2$DAGE1=="41"| natsal2$DAGE1== "42" | natsal2$DAGE1=="43" | natsal2$DAGE1== "44"] <- "40-44"

table(natsal2$DAGE1)
class(natsal2$agecat)
natsal2$DAGE1 <- as.factor(natsal2$DAGE1)

#### Ethnicity (chi-square) ####
names(natsal1)
natsal1[803]
natsal1[["ETHINIC11"]]      ##list all the responses
natsal1$ETHNIC11

table(natsal1$ABORT, natsal1$ETHNIC11)

dataset1
dataset1 <- natsal1[,c("ABORT","ETHNIC11")]  
which(dataset1$ABORT == "yes" | dataset1$ABORT=="no") 
dataset3 <- dataset1[which(dataset1$ABORT == "yes" | dataset1$ABORT=="no") ,] 
table(dataset3)
dataset3$ABORT <- droplevels(dataset3$ABORT)
table(dataset3)

which(dataset1$ETHNIC11 == "black" | dataset1$ETHNIC11=="white")
dataset4 <- dataset3[which(dataset3$ETHNIC11 == "black" | dataset3$ETHNIC11=="white") ,] #
dataset4$ETHNIC11 <- droplevels(dataset4$ETHNIC11)
table(dataset4)

chisq.test (table(subset1))
#### Ethnicity ############################

######################################################################### Again (need to combine categorical variables together)                     
names(natsal1)
natsal1[803]
natsal1[["ETHINIC11"]]           
natsal1$ETHNIC11

table(natsal1$ABORT, natsal1$ETHNIC11)                     

table(natsal1$ETHNIC11)
####################################################################
natsal1$ETHNIC11 <-as.character(natsal1$ETHNIC11) ## this transformed to a character, previously categorial
natsal1$ETHNIC11[natsal1$ETHNIC11 == "pakistani" |natsal1$ETHNIC11=="bangladeshi" ] <-  "Pakistani and Bangladeshi"
natsal1$ETHNIC11[natsal1$ETHNIC11 == "chinese" |natsal1$ETHNIC11=="other asian" | natsal1$ETHNIC11 == "other"] <-"chinese and all others"
natsal1$ETHNIC11 <- as.factor(natsal1$ETHNIC11)
table(natsal1$ETHNIC11)

#### Family Structure ####
# The df are different. 
natsal1[999]
natsal1[["PARENTS"]]      
natsal1$PARENTS

table(natsal1$ABORT, natsal1$PARENTS)
dataset1 <- natsal1[,c("ABORT","PARENTS")]   
which(dataset1$ABORT == "yes" | dataset1$ABORT=="no")  
dataset3 <- dataset1[which(dataset1$ABORT == "yes" | dataset1$ABORT=="no") ,]
table(dataset3)
dataset3$ABORT <- droplevels(dataset3$ABORT)
table(dataset3)

subset1 <- subset(dataset3, PARENTS == "both natural parents/adopted" | dataset3$PARENTS=="neither natural parent",select=c(ABORT,PARENTS))
table(subset1)
subset1$PARENTS <- droplevels(subset1$PARENTS) 
table(subset1)   

chisq.test (table(subset1))

#### Sex competency ####

natsal1[999]
natsal1[["SEXCOMP"]]      ##list all the responses
natsal1$SEXCOMP

table(natsal1$ABORT, natsal1$SEXCOMP)
dataset1 <- natsal1[,c("ABORT","SEXCOMP")]  
which(dataset1$ABORT == "yes" | dataset1$ABORT=="no")  
dataset3 <- dataset1[which(dataset1$ABORT == "yes" | dataset1$ABORT=="no") ,] 
table(dataset3)
dataset3$ABORT <- droplevels(dataset3$ABORT)
table(dataset3)

subset1 <- subset(dataset3,SEXCOMP == "not competent" | dataset3$SEXCOMP == "competent", select=c(ABORT, SEXCOMP))
table(subset1)
subset1$SEXCOMP <-droplevels(subset1$SEXCOMP)
table(subset1)


chisq.test (table(subset1))

#### Sexual debut ####

natsal1[557]
natsal1[["GAFSEX"]]     
natsal1$GAFSEX

table(natsal1$ABORT, natsal1$GAFSEX)
dataset1 <- natsal1[,c("ABORT","GAFSEX")] 
which(dataset1$ABORT == "yes" | dataset1$ABORT=="no")  
dataset3 <- dataset1[which(dataset1$ABORT == "yes" | dataset1$ABORT=="no") ,] 
table(dataset3)
dataset3$ABORT <- droplevels(dataset3$ABORT)
table(dataset3)

subset1 <- subset(dataset1,GAFSEX == "16-17" | dataset1$GAFSEX == "18-19", select=c(ABORT, GAFSEX))
table(subset1)
subset1$GAFSEX <-droplevels(subset1$GAFSEX)
table(subset1)


chisq.test(table(subset1))

################################################## CHI-SQUARE TEST (END) ###########################################################
